#encoding:UTF-8
import mylib
import re
import MySQLdb
import pythonlibSB


URL_TO_GET = 'http://24h.com.vn'

config = {1:[1,"BÓNG ĐÁ ANH"],
          2:[2,"Tây Ban Nha"],
          3:[3,"BÓNG ĐÁ Ý"],
          4:[4,"BÓNG ĐÁ PHÁP"],
          5:[5,"BÓNG ĐÁ ĐỨC"],
          6:[6,"V-LEAGUE"],
          7:[7,"Champions League"],
          8:[8,"Europa League"],
          9:[9,"WC 2014 - Châu Âu"]
          }


conn = mylib.create_conn()

def get_url_chung(id_urltype):
    try:
        cmd = "SELECT `url` FROM `tbl_urltoget` WHERE id_urltype = '%s';"%id_urltype
        urls = mylib.get_result_sql(conn, cmd)
        return urls[0]['url']
    except:
        return ''

def get_url_bangxephang():
    try:
        date_today = mylib.get_date_today()
        url = get_url_chung(1)
        content = mylib.get_data(url)
        center_content = pythonlibSB.s_string(content,'width="584"','td')
        
        
        green_content = pythonlibSB.s_string(center_content,'class="baiviet-TopContent"','div')
        p1 = green_content.find('class="baiviet-TopContent"')
        p2 = green_content.find('class="cap2-boxtop-note"',p1)
        t1 = green_content[p1:p2]
        regex = '(class="baiviet-title")(.*?)(<a.*?)(href=")(.*?)(")'
        
        for i in range(9):
            if i == 5 or i == 8:
                continue
            elif t1.find(config.get(i+1)[1])!= -1:
                id_giai = config.get(i+1)[0]
                insert_into_db(t1, id_giai, date_today, regex)
        
        
        banner_center_pos = center_content.find('class="boxDoi-sub-t"')
        end_pos = center_content.find('class="div-banner300250"',banner_center_pos)
        content4 = center_content[banner_center_pos:end_pos]
        my_pos1 = content4.find('class="boxDonItem"')
        while my_pos1 != -1:
            my_pos2 = content4.find('class="clear"',my_pos1)
            if my_pos2 == -1:
                break
            t = content4[my_pos1:my_pos2]
            regex = '(class="div_title_news")(.*?)(<a.*?)(href=")(.*?)(")'
            
            for j in range(9):
                if j == 5 or j == 8:
                    continue
                elif t.find(config.get(j+1)[1])!= -1:
                    id_giai = config.get(j+1)[0]
                    insert_into_db(t, id_giai, date_today, regex)
                
            my_pos1 = my_pos2 + 1
    except:
        print "Loi roi"
 
def insert_into_db(in_str,id_giai,date_today,regex):
    
    re2 = re.compile(regex)
    if re2.search(in_str):
        url_get2 = re2.search(in_str).group(5)
        url_get2 = url_get2 if url_get2.find('http://')!=-1 else (URL_TO_GET+url_get2)
        cmd = "CALL insert_url_bangxephang('%s','%s','%s')"%(id_giai,url_get2,date_today)
        print cmd
        cursor = conn.cursor(MySQLdb.cursors.DictCursor)
        cursor.execute(cmd)
        cursor.close()
        print "insert thanh cong"
 

if __name__ == "__main__":
    get_url_bangxephang()